package com.scala.learn.sparksql2

import org.apache.spark.sql.{Dataset, SparkSession}

/**
  * @Copyright: Shanghai Definesys Company.All rights reserved.
  * @Description:
  * @author: chuhaitao
  * @since: 2019/3/9 20:48
  * @history:
  *          1.2019/3/9 created by chuhaitao
  */
object DataSetWorldCount {
  def main(args: Array[String]): Unit = {


    val sparkSession = SparkSession.builder()
      .appName("wc")
      .master("local")
      .getOrCreate()
    val lines: Dataset[String] = sparkSession.read.textFile("D:\\tmp\\world.txt")
    /*
    * |               value|
    +--------------------+
    |http://www.baidu.com|
    |http://www.google...|
    |  http://cn.bing.com|
    |http://www.atguig...|
    | http://www.sohu.com|
    | http://www.sina.com|
    |http://www.sin2a.com|
    |http://www.sin2de...|
    |http://www.sindsa...|
    +--------------------+
    * */
    lines.show()
    //导入隐式转换
    import  sparkSession.implicits._

    val words: Dataset[String] = lines.flatMap(_.split(" "))

    words.show()
    //导入函数的包
    import org.apache.spark.sql.functions._
    val dataFrame = words.groupBy($"value" as "world").agg(count("*") as "count")

    dataFrame.show()
  }


}
